#!/usr/bin/env python3

from garage import wrap_experiment
from garage.envs import GymEnv, normalize
from garage.experiment.deterministic import set_seed
from garage.sampler import RaySampler
from garage.torch.algos import SHARP
from garage.torch.optimizers import OptimizerWrapper
from garage.torch.policies import GaussianMLPPolicy
from garage.torch.value_functions import GaussianMLPValueFunction
from garage.trainer import Trainer
from garage.torch.optimizers.SHARP_optimizer import SHARPOptimizer


import torch



lr=0.5
@wrap_experiment(log_dir="/root/Data/aaai/halfCheetah-SGD-lr={}".format(lr), archive_launch_repo=False)
def sgd_halfCheetah(ctxt=None, seed=43):
    """

    Args:
        ctxt (garage.experiment.ExperimentContext): The experiment
            configuration used by LocalRunner to create the snapshotter.
        seed (int): Used to seed the random number generator to produce
            determinism.
    """
    set_seed(seed)
    runner = Trainer(ctxt)

    n_epochs = 1000
    sampler_batch_size = 10000

    env = normalize(GymEnv('HalfCheetah-v2'))
    env._env.seed(seed)
    env.action_space.seed(seed)
    policy = GaussianMLPPolicy(env.spec,
                               hidden_sizes=[64, 64], )

    value_function = GaussianMLPValueFunction(env_spec=env.spec,
                                              hidden_sizes=[32, 32],
                                              hidden_nonlinearity=torch.tanh,
                                              output_nonlinearity=None)

    sampler = RaySampler(agents=policy,
                         envs=env,
                         max_episode_length=500,
                         )


    policy_optimizer = OptimizerWrapper((torch.optim.SGD, {"lr": lr}), policy)
    algo = SHARP(env_spec=env.spec,
                 policy=policy,
                 value_function=value_function,
                 sampler=sampler,
                 discount=0.99,
                 center_adv=False,
                 policy_optimizer=policy_optimizer,
                 neural_baseline=True

                 )

    runner.setup(algo, env)
    runner.train(n_epochs=n_epochs, batch_size=sampler_batch_size)


sgd_halfCheetah()
